import requests
# import re
from lxml import etree
url = 'https://www.gushiwen.cn/mingjus/'

headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36'
}


res = requests.get(url,headers=headers)
# print(res.status_code)
# print(res.text)

tree = etree.HTML(res.text)

# 取所有作者名
authors = tree.xpath('//div[@id="type2"]//div[@class="sright"]/a')

# print('共找到', len(authors), '位诗人：')
for a in authors:
    # print(a)
    dit = {
        '作者名':a.xpath('./text()')[0].strip(),
        '详情路径':f"https://www.gushiwen.cn{a.xpath('./@href')[0]} "
    }
    # https: // www.gushiwen.cn / mingjus / default.aspx?astr = %e8 % 8
    # b % 8
    # f % e8 % bd % bc

    # print(dit)
    name = a.xpath('./text()')[0].strip()
    detial_url = f"https://www.gushiwen.cn{a.xpath('./@href')[0]}"
    # {'作者名': '李白', '详情路径': 'https://www.gushiwen.cn/mingjus/default.aspx?astr=%e6%9d%8e%e7%99%bd '}
    # print(detial_url)
    detial_res = requests.get(detial_url,headers=headers)
    # # print(detial_res.status_code)
    # print(detial_res.text)
    tree = etree.HTML(detial_res.text)
    poems = tree.xpath('//div[@class="left"]//div[@class="sons"]/div[@class="cont"]')
    # print(poems)
    for poem in poems:
        p ={
            '诗人':name,
            '诗句':poem.xpath('./a[1]/text()')[0],
        }
        print(p)
    # break